** This code accompanies the LIS Video Tutorial Handling LWS Multiple Imputation
** Ph. Van Kerm, August 2020
** It can be pasted and run in LISSY http://webui.lisdatacenter.org

clear
use $us16h 
tabulate inum 
codebook anw

bys hid (inum) :  replace hpopwgt = hpopwgt[1] // Another catch, in the US SCF the population weight can vary across implicates. Let's ignore this.

// The easy way
preserve
	keep if inum==1
	svyset [pw=hpopwgt] , psu(hid) 
	svy : mean anw 
	svy : inequaly anw , gini keepnonpositive 
restore

// Going the extra mile
preserve
	keep if inum==2
	svyset [pw=hpopwgt] , psu(hid) 
	svy : mean anw 
	svy : inequaly anw , gini keepnonpositive 
restore
	
preserve
	keep if inum==3
	svyset [pw=hpopwgt] , psu(hid) 
	svy : mean anw 
	svy : inequaly anw , gini keepnonpositive 
restore

forvalues m=1/5 {
	di "Implicate number `m'"
	svyset [pw=hpopwgt] , psu(hid) 
	svy : mean anw  if inum==`m'  
	local mean`m' = _b["anw"]
	svy : inequaly anw if inum==`m'  , gini keepnonpositive 
	local gini`m' = e(SGini_2)
}	
di "Average mean estimate: " (`mean1'+`mean2'+`mean3'+`mean4'+`mean5')/5
di "Average Gini estimate: " (`gini1'+`gini2'+`gini3'+`gini4'+`gini5')/5
	

// Using the -mi- prefix

// First, we must recreate the unimputed dataset (inum==0).
// We will assume that if a variable varies across implicates for a given observation, it is imputed.
// ... and therefore set to missing in the unimputed dataset
// Note: an imputation 'tag' has to be generated for each variable used in one's analysis. Here only 'anw' is used.
bys hid : egen sdanw = sd(anw)   // the standard deviation of anw acorss implicates
gen imputed = (sdanw>0) if !missing(sdanw) 
tab imputed if inum==1 

expand 2 if inum==1 , gen(original) 	  // clone all entries inum==1 (original==1 for the clone and 0 otherwise)
replace inum=0  if inum==1 & original==0  // set inum to 0 for the newly created clones 
replace anw = . if inum==0 & imputed==1   // set anw to missing if inum==0 & imputed 

// Then we can 'import' the data into Stata mi flong format  (flong corresponds to stacking all implicates) 
mi import flong , 	///
	clear 			///
	m(inum) 		/// inum is the implicate identifier 
	id(hid) 		/// hid is the unique observation identifier
	imputed(anw) 	// variable anw is declared imputed (there can be more than one variable -- see below) 
mi svyset [pw=hpopwgt]  , psu(hid)    // and reset the survey information
mi query 			// just a check

// We are now ready to go 
mi estimate : /// 
   svy : mean anw   
mi estimate : /// 
   svy : inequaly anw  , gini keepnonpositive 
   
// Check what is happening under the hood
mi estimate , noisily vartable : /// 
   svy : mean anw 
 
// -mi- has more functionality 
mi xeq 1 : svy : mean anw     // execute command using only the first implicate
mi xeq 0/1 : ///
	svy : mean anw  ;  /// to use only the unimputed dataset and the first implicate and to combine multiple commands
	svy : inequaly anw , gini keepnonpositive
	
// -mi estimate- runs with all estimation commands (returning estimates in e(b)) 
// It needs some tweaking to run with other commands 
// See https://www.stata.com/support/faqs/statistics/combine-results-with-multiply-imputed-data/
program define altgini , eclass
	args var wgt
	sgini `var' `wgt'
	tempname b V
	mat `b' = r(coeff)
	mat `V' = 0
	mat colnames `b' = Gini
	mat colnames `V' = Gini
	mat rownames `V' = Gini
	ereturn post `b' `V' ,obs(`=r(N)')
	ereturn local cmd altgini
end	
mi estimate , cmdok  : ///
	altgini anw [pw=hpopwgt]   
 
 
// Finally let's look how to combine multiple imputation with the LWS replication weights for running bootstrap inference 

// 1. The format needs to be changed from 'long' to 'wide'
mi convert wide , clear 
mi query
 
// 2. We then merge the replication weights 
qui merge 1:1 hid using $us16r 
 
// 3. We update the -svyset- statement to specify the replication weights as bootstrap weights 
mi svyset [pw=hpopwgt] , psu(hid) bsrweight(hrwgt1-hrwgt99) vce(linearized)   // using only 99 here --- specify 999 in your applications!  
 
// 4. Et voila
mi estimate  , vceok : /// 
    svy bootstrap: mean anw  
mi estimate  , vceok : /// 
    svy bootstrap : inequaly anw  , gini keepnonpositive